#delimit;
clear all;
set more off;

capture log using "05_Merge_Grants_Data_and_Chetty_Inequality.log", replace;
/*******************************************************************************************/;
** INPUT: Grants Detail, Zipcode to FIPS, FIPS to Commuting Zone, and Chetty Inequality commuting zone income stats;
** OUTPUT: Clean grantee data set, includes inequality of grantee's geographic region;
** LAST MODIFIED: JAG, 6/21/2021;
** RUN TIME: <5 minutes using Stata 15 on computer with 2 x 2.60GHz processors and 64GB RAM;
/*******************************************************************************************/;

** Generate inequality variables at the commuting zone level.;
** Note: Variable ineq only captures black-white gap for men, which is main driver of black-white gap according to Chetty et al. (2020); 
** If wanted to guess male and female split at 50/50 then average male and female income ranks before differencing that is done in ineq2.;
use "../Raw Data/Chetty Inequality Data/chetty_mobility_data.dta", clear;
gen ineq = kir_white_male_p25 - kir_black_male_p25;
gen ineq2 = ((kir_white_male_p25 + kir_white_female_p25) - (kir_black_male_p25 + kir_black_female_p25)) / 2;

foreach v in ineq ineq2 {;
	egen med_`v' = median(`v');
	gen byte high_`v' = (`v' > med_`v') if !mi(med_`v');
	drop med_`v';
};
save "../Raw Data/Chetty Inequality Data/chetty_inequality.dta", replace;

** Generate county (FIPS) to commuting zone (CZ) crosswalk file from Chetty et al. (2014) data;
pwd;
use "../Raw Data/Chetty Inequality Data/tract_covariates.dta", clear;
gen fips = string(state, "%02.0f") + string(county, "%03.0f");
tostring cz, format("%05.0f") replace;
replace cz = "" if cz == ".";
keep fips cz;
duplicates drop;
isid fips;
order fips, before(cz);
save fips_to_cz, replace;


* Merge: Zip to FIPS (HUD data) and FIPS to CZ (Chetty et al. 2014 data);
use "Zip to FIPS", clear;
keep if inrange(year, 2013, 2018);
drop if mi(zip);
gen fips = string(county, "%05.0f");
merge m:1 fips using fips_to_cz;
keep year qtr zip cz *_ratio;


* ZIP CODES CAN SPAN MULTIPLE COUNTIES. OCCASIONALLY, THEY SPAN MULTIPLE
* COMMUTING ZONES. HANDLE THIS BY USING THE COUNTY WITH THE HIGHEST BUSINESS
* PERCENTAGE FOR THE ZIP CODE, THEN HIGHEST RESIDENTIAL PERCENTAGE, THEN HIGHEST
* "OTHER" PERCENTAGE.;


foreach v of varlist bus_ratio res_ratio oth_ratio {;  //In this order!;
	bys year qtr zip: egen highest_`v' = max(`v');
	bys year qtr zip: gen is_highest_`v' = `v' == highest_`v';
	keep if is_highest_`v';
};
keep year qtr zip cz;
duplicates drop;
* NOTE: There may still be ZIP codes assigned to multiple commuting zones if all;
* of their ratios are the same. Randomly choose which one to keep.;
set seed 1234;
gen double shuffle = runiform();
bys year qtr zip (shuffle): keep if _n==1;
drop shuffle;
* Success;
isid year qtr zip;

bys year zip cz: egen qtrs_in_cz = count(qtr);
bys year zip: egen highest_qtr_count = max(qtrs_in_cz);
bys year zip: gen has_highest_qtr_count = qtrs_in_cz == highest_qtr_count;
keep if has_highest_qtr_count;
keep year qtr zip cz;
duplicates drop year zip cz, force;
* Remaining duplicates had the same number of quarters in the year in each county.;
* Keep the latest one;
bys year zip (qtr): keep if _n==_N;
* Success;
isid year zip;

keep zip year cz;
tostring zip, format("%05.0f") replace;

* Go from yearly to constant ZIP-CZ pair;
bys zip cz: egen yrs_in_cz = count(year);
bys year zip: egen highest_yr_count = max(yrs_in_cz);
bys year zip: gen has_highest_yr_count = yrs_in_cz == highest_yr_count;
keep if has_highest_yr_count;
keep year zip cz;
duplicates drop zip cz, force;

bys zip (year): keep if _n==_N;
isid zip;
keep zip cz;
save "Zip to CZ.dta", replace;


* Now do the merge.;
* Note: Variable 'zip' is the grantee's ZIP code.;

use "Dom Grants Detail.dta", clear;
ren zip zip_raw;
gen zip = substr(zip_raw, 1, 5);
* merge m:1 year zip using zip_to_cz;
merge m:1 zip using "Zip to CZ.dta";
drop if _merge == 2;
drop _merge;

destring cz, replace;
merge m:1 cz using "../Raw Data/Chetty Inequality Data/chetty_inequality.dta";
drop if _merge == 2;
drop _merge;


save "Grants detail with inequality.dta", replace;
erase fips_to_cz.dta;


log close;
